# Set seed
set.seed(1996)
#############################
rm(list=setdiff(ls(), c('script', 'scripts', 'log_file')))
#############################
# Load Data
master <- readRDS('Master_web.rds')

# Subset to only partisan primary states incumbents
master_inc <- subset(master, inc == 1 & (state_postal != 'CA'& state_postal !='WA'& state_postal!='LA'),
                     select = c('candidate', 'state_postal', 'cd', 'candnumber',
                                'year', 'cand_party', 'FECCandID', 'raceid',
                                'cfscore', 'web_score', 'candvotes'))
# Create Party Means by WEB Scores and CF Scores
dem_mean <- mean(master$web_score[master$cand_party == 'Democrat'], na.rm = TRUE)
rep_mean <- mean(master$web_score[master$cand_party == 'Republican'], na.rm = TRUE)

dem_mean_cf <- mean(master$cfscore[master$cand_party == 'Democrat'], na.rm = TRUE)
rep_mean_cf <- mean(master$cfscore[master$cand_party == 'Republican'], na.rm = TRUE)

# Create Challenger Position variable relative to party mean
master$challenger_position_web <- NA

# set conditions for factor variable coding relative to party means WEB Scores
master$challenger_position_web[master$cand_party == 'Democrat' &
                             master$web_score <= dem_mean] <- 'Extreme' 
master$challenger_position_web[master$cand_party == 'Democrat' &
                             master$web_score > dem_mean] <- 'Moderate' 
master$challenger_position_web[master$cand_party == 'Republican' &
                             master$web_score >= rep_mean] <- 'Extreme' 
master$challenger_position_web[master$cand_party == 'Republican' &
                             master$web_score < rep_mean] <- 'Moderate'

# set conditions for factor variable coding relative to party means CFScores
master$challenger_position_cfscore <- NA
master$challenger_position_cfscore[master$cand_party == 'Democrat' &
                                 master$cfscore <= dem_mean_cf] <- 'Extreme' 
master$challenger_position_cfscore[master$cand_party == 'Democrat' &
                                 master$cfscore > dem_mean_cf] <- 'Moderate' 
master$challenger_position_cfscore[master$cand_party == 'Republican' &
                                 master$cfscore >= rep_mean_cf] <- 'Extreme' 
master$challenger_position_cfscore[master$cand_party == 'Republican' &
                                 master$cfscore < rep_mean_cf] <- 'Moderate'

# Create Empty Columns in Dataframe (challenger candidate, candpct, web_score, 
# cf_score, challenger_position_web, challenger_position_cf)
columns <- c('chal_candidate', 'chal_candvotes', 'chal_webscore',
             'chal_cfscore', 'challenger_position_web', 'challenger_position_cfscore')

# Create empty DF
master_inc[,(ncol(master_inc)+1):(ncol(master_inc)+ 18*6)] <- NA

# Loop over possible challengers (18 is max number in one election) to rename columns
for(i in 1:18){
  colnames(master_inc)[(12+(i-1)*6):(17+(i-1)*6)] <- paste(columns, i, sep = '_')
}

# Loop over incumbents to add challenger data
for(i in 1:nrow(master_inc)){
  # Subset to all other candidates in race except incumbent
  temp <- subset(master, raceid == master_inc$raceid[i] & candidate != master_inc$candidate[i])
  # Skip if no challengers
  if(nrow(temp) == 0){
    next
  }
  # Sort by vote share
  temp <- temp[order(-temp$candvotes),]
  
  # Loop Over challengers 
  for(j in 1:nrow(temp)){
    master_inc[i,(12+(j-1)*6):(17+(j-1)*6)] <- c(temp$candidate[j], 
                                                 as.numeric(temp$candvotes[j]),
                                                 as.numeric(temp$web_score[j]),
                                                 as.numeric(temp$cfscore[j]), 
                                                 temp$challenger_position_web[j],
                                                 temp$challenger_position_cfscore[j])
  }
  
}

write_rds(master_inc, 'incumbent_challengerdf.rds')
